Scenario

Within this chapter developmental plasticity was explored within Acanthochromis polyacanthus that were collected from two different regions (i.e., low-latitude, Cairns, and high-latitude, Mackay). Fish were held in common garden experiments at 28.5 C. Clutch data was collected along with parental morphometeric data to determine how fish from each population performed at 28.5 C, a temperature that was shown to produce similar absoluate aerobic scope performances in a previous study [link]. Once hatched offspring were placed into three different temperature treatments, 28.5, 30, and 31.5 C. After approximately 5-6 months offspring length and weight was measured, as well as CTmax and respiration during CTmax trials.

Load packages

library(tidyverse) # data manipulation
library(ggpubr) # figure arrangement 
library(brms) # Bayesian models
library(StanHeaders)# needed to run Bayesian models
library(rstan) # needed to run Bayesian models
library(standist) # needs to be installed 
library(bayesplot) # needed for MCMC diagnostics 
library(DHARMa) # model validation 
library(ggdist) # partial plots 
library(tidybayes) # partial plots 
library(broom.mixed) # model investigation
library(emmeans) # pairwise comparisons
library(rstanarm) # pairwise comparisons - need for emmeans
library(car) # for vif 
library(modelsummary) # descriptive statistics

Set working directory

knitr::opts_knit$set(root.dir=working.dir)

Import data

growth <- read_delim("./import_files/growth_data.txt", 
    delim = "\t", escape_double = FALSE, 
    col_types = cols(NOTES = col_skip(), 
        ...16 = col_skip(), ...17 = col_skip()), 
    trim_ws = TRUE) 

clutch_data <- read_delim("import_files/clutch_data_2022_2023.txt", 
    delim = "\t", escape_double = FALSE, 
    trim_ws = TRUE) %>% 
  mutate(CLUTCH_NUMBER = as.factor(CLUTCH_NUMBER))

Data manipulation

density <- count(growth, CLUTCH_NUMBER, TANK) |> 
  rename(DENSITY = n) |> 
  mutate(CLUTCH_NUMBER = as.factor(CLUTCH_NUMBER), 
         TANK = as.factor(TANK)) 
growth2 <- growth |> 
  mutate(CLUTCH_NUMBER = as.factor(CLUTCH_NUMBER), 
         MALE = as.factor(MALE), 
         FEMALE = as.factor(FEMALE), 
         REGION = as.factor(REGION), 
         POPULATION = as.factor(POPULATION), 
         DATE_OF_HATCH = as.Date(DATE_OF_HATCH, format = "%d/%m/%Y"), 
         DATE_SAMPLED = as.Date(DATE_SAMPLED, format = "%d/%m/%Y"), 
         DEV_TEMP = as.factor(DEV_TEMP), 
         TANK = as.factor(TANK), 
         REP = as.factor(REP), 
         LENGTH = as.numeric(LENGTH), 
         MASS = as.numeric(MASS), 
         FULTONK = (1000*MASS)/(LENGTH^3), # Adding FULTON'S K metric
         EXPERIMENT = as.factor(EXPERIMENT)) |> 
  full_join(select(clutch_data, c("CLUTCH_NUMBER",
                                   "MALE_STANDARD_LENGTH", 
                                   "MALE_MASS", 
                                   "MALE_LAT", 
                                   "MALE_LONG", 
                                   "FEMALE_STANDARD_LENGTH", 
                                   "FEMALE_MASS", 
                                   "FEMALE_LAT", 
                                   "FEMALE_LONG", 
                                   "CLUTCH_ORDER", 
                                   "DAYS_IN_TREATMENT", 
                                   "EGG_COUNT", 
                                   "HATCHING_SUCCESS")), by = "CLUTCH_NUMBER") |> 
  select(c(1:6,16:27,7:13,15,14)) |> 
  drop_na(EXPERIMENT) |>
  mutate(CLUTCH_ORDER = as.factor(CLUTCH_ORDER), 
         EXP_GROUP = as.factor(paste0(REGION,"_",DEV_TEMP))) |> 
  inner_join(density, by=c("CLUTCH_NUMBER","TANK")) |> 
  mutate(FEMALE_MASS = coalesce(FEMALE_MASS, MALE_MASS),
         TANK =as.numeric(as.character(TANK)),
         LEVEL = as.factor(case_when(TANK >= 1 & TANK <= 199 ~ 1,
                           TANK >= 200 & TANK <= 299 ~ 2,
                           TANK >= 300 & TANK <= 399 ~ 3,
                           TRUE ~ NA_real_))) |> 
  mutate(TANK = as.factor(TANK)) |> 
  filter(MASS < 4, na.rm=TRUE, 
         FULTONK < 0.079, 
         FULTONK > 0.01, 
         DENSITY > 3)  

In the code above a number of variables were centered because within these metrics the value 0 is meaningless. For example you cannot have a fish length or mass of 0. Therefore, the mean was subtracted for a given variable was subtracted by every value. The y-intercept for these values will therefore reflect the mean, and the slope can be interpreted as ‘y increases/decreases x amount for every 1-unit increase from the mean [INSERT METRIC]’.

Exploratory data analysis

LENGTH VS. MASS

GROUP COMPARISONS

NOTE: On some of the figures ylimits have been set and therefore outliers are hidden

distribution

Parent mass and length

male.mass.plot <- ggplot(growth2, aes(x=MALE_MASS, y=MASS)) + 
  geom_point(alpha = 0.5) + 
  ggtitle("Farther vs. offspring mass") +
  theme_classic() 

female.mass.plot <- ggplot(growth2, aes(x=FEMALE_MASS, y=MASS)) + 
  geom_point(alpha = 0.5) + 
  ggtitle("Mother vs. offspring mass") +
  theme_classic()  

male.length.plot <- ggplot(growth2, aes(x=MALE_STANDARD_LENGTH, y=LENGTH)) + 
  geom_point(alpha = 0.5) + 
  ggtitle("Farther vs. offspring length") +
  theme_classic() 

female.length.plot <- ggplot(growth2, aes(x=FEMALE_STANDARD_LENGTH, y=LENGTH)) + 
  geom_point(alpha = 0.5) + 
  ggtitle("Mother vs. offspring length") +
  theme_classic()  

ggarrange(male.mass.plot, female.mass.plot, male.length.plot, female.length.plot, 
          ncol=2, 
          nrow=2)

Age

## Female Mass

Female Latitude

Density

Removal of outliers

There seems to be three areas that standout as potentially possessing outliers:

  1. The single individual that is >4 grams. This individual is more than double the size of the average individual.
  2. The cluster of individuals between 1 and 2grams that are below ~25 mm. Also there is no observable trend in this points (i.e., they are not all form the same developmental temperature, clutch, or population, which could have explained why they performed differently).
  3. Individuals that are floating above the main cluster. These could be individuals that grew rapidly and outcompeted diblings in their tank, or perhaps their tank had low density to begin with.

Individuals identified in numbered points one and two are not unexpected. These data points will be double check to ensure they were entered correctly, if so they will be removed from the data set.

It looks like there are some issues with the model. Mainly around the presence of outliers. Some of these outliers could be seen in our explanatory data analysis. Let’s revisit our raw data to check and remove the presence of outliers.

growth3 <- growth2 |> 
  filter(MASS < 4, na.rm=TRUE, 
         FULTONK < 0.079, 
         FULTONK > 0.01, 
         DENSITY > 3)  

Descriptive statistics

MinMax <- function(x) paste0('[', min(x, na.rm = TRUE), ', ', max(x, na.rm = TRUE), ']') 
Range <- function(x) max(x, na.rm = TRUE) - min(x, na.rm = TRUE)
datasummary(REGION*(LENGTH + MASS) + (LENGTH + MASS) ~  median + MinMax + Range + Histogram, 
            data=growth3)
tinytable_86d91n8o0meys5oerg8k
REGION median MinMax Range Histogram
core LENGTH 31.47 [15.15, 46.82] 31.67 ▁▃▆▇▄▁
MASS 1.09 [0.0882, 3.106] 3.02 ▁▃▇▇▅▂▁
leading LENGTH 31.35 [16.16, 45.17] 29.01 ▁▃▆▇▅▂
MASS 1.07 [0.138, 3.528] 3.39 ▁▄▇▅▃▁
LENGTH 31.45 [15.15, 46.82] 31.67 ▁▃▆▇▄▁
MASS 1.08 [0.0882, 3.528] 3.44 ▁▃▇▅▃▁
datasummary(REGION * DEV_TEMP * (LENGTH + MASS) + (LENGTH + MASS) ~  median + MinMax + Range + Histogram, 
            data=growth3)
tinytable_0q2z9ccqd5558lo3w32o
REGION DEV_TEMP median MinMax Range Histogram
core 28.5 LENGTH 32.30 [16.9, 45.09] 28.19 ▁▂▄▇▄▂
MASS 1.17 [0.1226, 3.106] 2.98 ▂▅▇▅▂▁
30 LENGTH 31.29 [16.15, 46.82] 30.67 ▁▃▇▇▂▁
MASS 1.07 [0.1778, 2.7827] 2.60 ▁▂▇▇▅▃▁
31.5 LENGTH 30.42 [15.15, 40.03] 24.88 ▁▂▄▆▇▅▃▁
MASS 1.02 [0.0882, 2.79] 2.70 ▁▅▇▇▅▃▂▁▁▁
leading 28.5 LENGTH 31.95 [16.16, 45.17] 29.01 ▁▂▆▇▅▂
MASS 1.09 [0.138, 3.06] 2.92 ▁▃▇▆▅▃
30 LENGTH 32.35 [16.43, 42.88] 26.45 ▁▁▄▆▇▄▁
MASS 1.16 [0.3081, 2.6554] 2.35 ▁▄▇▇▇▅▂▁
31.5 LENGTH 30.01 [17.98, 42.35] 24.37 ▁▃▇▅▄▁▁
MASS 0.97 [0.1854, 3.528] 3.34 ▁▅▇▃▁
LENGTH 31.45 [15.15, 46.82] 31.67 ▁▃▆▇▄▁
MASS 1.08 [0.0882, 3.528] 3.44 ▁▃▇▅▃▁

note this also removes NA’s therefore the difference in data frames is 21 row:

  • 1 datum point with MASS greater than 4
  • 1 datum point with high value (removed via FULTONK being less than 0.01)
  • 2 data points where the tank density was 2
  • 9 data points with low values (removed via FULTONK being greater than 0.079)
  • 8 NA’s removed
  • Total: 21

Now Let’s look at out plots again

Fit model [random factors]

First we will place random factors only within out model and see if they do a good job explaining the variance within our model. We will also be include priors which will be based off of out length data.

Hypothesis test will include:

  1. Null model
  2. EGG_COUNT ~ 1 + (1| FEMALE) + (1| TANK)
  3. EGG_COUNT ~ 1 + (1| FEMALE) + (1| TANK) + (1| LEVEL)
  4. EGG_COUNT ~ 1 + (1| FEMALE) + (1| TANK) + (1| LEVEL) + (1| POPULATION)
  5. EGG_COUNT ~ 1 + (1| FEMALE) + (1| TANK) + (1| LEVEL) + (1| POPULATION)+ (1| CLUTCH_ORDER)
growth3 |> 
  group_by(REGION,DEV_TEMP) |> 
  summarise(mean(na.omit(MASS)), 
            sd(na.omit(MASS))) 
## `summarise()` has grouped output by 'REGION'. You can override using the
## `.groups` argument.

Priors

mass.priors.re <- prior(normal(1.13, 0.20), class="Intercept") +  
  prior(student_t(3, 0, 0.53), class = "sigma")

Models

Null

f.model.null <- bf(MASS ~ 1, 
                   family = gaussian()) 

model.null <- brm(f.model.null,
              data = growth3, 
              prior = mass.priors.re,
              warmup = 500, 
              iter = 5000,
              seed=123, 
              cores=2, 
              save_pars = save_pars(all=TRUE),  
              sample_prior = "yes",
              chains = 2, 
              thin = 5, 
              control = list(adapt_delta=0.95)) 
## Compiling Stan program...
## Start sampling
saveRDS(model.null, file="./02.mass_data_analysis_files/model.null.RDS")

Model1

f.model1 <- bf(MASS ~ 1 + (1|FEMALE) + (1|TANK), 
                          family=gaussian())

model1 <- brm(f.model1,
              data = growth3, 
              prior = mass.priors.re,
              warmup = 500, 
              iter = 5000,
              seed=123, 
              cores=2, 
              save_pars = save_pars(all=TRUE),  
              sample_prior = "yes",
              chains = 2, 
              thin = 5, 
              control = list(adapt_delta=0.95)) 
## Compiling Stan program...
## Start sampling
saveRDS(model1, file="./02.mass_data_analysis_files/model1.RDS")

Model2

f.model2 <- bf(MASS ~ 1 + (1|FEMALE) + (1|TANK) + (1| LEVEL), 
                          family=gaussian())

model2 <- brm(f.model2,
              data = growth3, 
              prior = mass.priors.re,
              warmup = 500, 
              iter = 5000,
              seed=123, 
              cores=2, 
              save_pars = save_pars(all=TRUE),  
              sample_prior = "yes",
              chains = 2, 
              thin = 5, 
              control = list(adapt_delta=0.95)) 
## Compiling Stan program...
## Start sampling
## Warning: There were 23 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: Examine the pairs() plot to diagnose sampling problems
saveRDS(model2, file="./02.mass_data_analysis_files/model2.RDS")

Model3

f.model3 <- bf(MASS ~ 1 + (1| FEMALE) + (1| TANK) + (1|LEVEL) + (1| POPULATION), 
               family=gaussian())

model3 <- brm(f.model3,
              data = growth3, 
              prior = mass.priors.re,
              warmup = 500, 
              iter = 5000,
              seed=123, 
              cores=2, 
              save_pars = save_pars(all=TRUE),  
              sample_prior = "yes",
              chains = 2, 
              thin = 5, 
              control = list(adapt_delta=0.95)) 
## Compiling Stan program...
## Start sampling
## Warning: There were 26 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
saveRDS(model3, file="./02.mass_data_analysis_files/model3.RDS")

Model4

f.model4 <- bf(MASS ~ 1 + (1|FEMALE) + (1 |TANK) + (1| LEVEL) + (1| POPULATION)+ (1|CLUTCH_ORDER), 
                        family=gaussian())

model4 <- brm(f.model4,
              data = growth3, 
              prior = mass.priors.re,
              warmup = 500, 
              iter = 5000,
              seed=123, 
              cores=2, 
              save_pars = save_pars(all=TRUE),  
              sample_prior = "yes",
              chains = 2, 
              thin = 5, 
              control = list(adapt_delta=0.95)) 
## Compiling Stan program...
## Start sampling
## Warning: There were 21 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: There were 24 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
saveRDS(model4, file="./02.mass_data_analysis_files/model4.RDS")

LOO

LOO(model.null,model1, model2,model3,model4) 
## Warning: Found 1 observations with a pareto_k > 0.7 in model 'model2'. It is
## recommended to set 'moment_match = TRUE' in order to perform moment matching
## for problematic observations.
## Output of model 'model.null':
## 
## Computed from 1800 by 1961 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo  -1193.5 41.0
## p_loo         2.7  0.3
## looic      2387.0 82.0
## ------
## MCSE of elpd_loo is 0.0.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.9, 1.0]).
## 
## All Pareto k estimates are good (k < 0.69).
## See help('pareto-k-diagnostic') for details.
## 
## Output of model 'model1':
## 
## Computed from 1800 by 1961 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo   -940.0 42.3
## p_loo       101.0  5.1
## looic      1880.1 84.6
## ------
## MCSE of elpd_loo is 0.3.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.8, 1.1]).
## 
## All Pareto k estimates are good (k < 0.69).
## See help('pareto-k-diagnostic') for details.
## 
## Output of model 'model2':
## 
## Computed from 1800 by 1961 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo   -940.1 42.3
## p_loo       100.6  5.1
## looic      1880.2 84.7
## ------
## MCSE of elpd_loo is NA.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.3, 1.1]).
## 
## Pareto k diagnostic values:
##                           Count Pct.    Min. ESS
## (-Inf, 0.69]   (good)     1960  99.9%   270     
##    (0.69, 1]   (bad)         1   0.1%   <NA>    
##     (1, Inf)   (very bad)    0   0.0%   <NA>    
## See help('pareto-k-diagnostic') for details.
## 
## Output of model 'model3':
## 
## Computed from 1800 by 1961 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo   -939.1 42.3
## p_loo        99.4  5.0
## looic      1878.2 84.5
## ------
## MCSE of elpd_loo is 0.3.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.1, 1.1]).
## 
## All Pareto k estimates are good (k < 0.69).
## See help('pareto-k-diagnostic') for details.
## 
## Output of model 'model4':
## 
## Computed from 1800 by 1961 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo   -939.2 42.4
## p_loo        98.9  5.1
## looic      1878.5 84.8
## ------
## MCSE of elpd_loo is 0.3.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.7, 1.2]).
## 
## All Pareto k estimates are good (k < 0.69).
## See help('pareto-k-diagnostic') for details.
## 
## Model comparisons:
##            elpd_diff se_diff
## model3        0.0       0.0 
## model4       -0.1       1.1 
## model1       -0.9       1.1 
## model2       -1.0       0.8 
## model.null -254.4      25.9

CLUTCH_ORDER does not improve the model. Therefore it will not be used moving forward.

Fit model [fixed factors]

Collinearity

col.lm <- lm(MASS ~ 1 + DEV_TEMP+REGION + 
                         scale(FEMALE_MASS, center=TRUE, scale=TRUE) + 
                         scale(DENSITY, center=TRUE, scale=TRUE) + 
                         scale(AGE_DAYS, center=TRUE, scale=TRUE) +
                         scale(LENGTH, center=TRUE, scale=TRUE), 
                data = growth3)
vif(col.lm)
##                                                     GVIF Df GVIF^(1/(2*Df))
## DEV_TEMP                                        1.053304  2        1.013068
## REGION                                          1.165551  1        1.079607
## scale(FEMALE_MASS, center = TRUE, scale = TRUE) 1.098656  1        1.048168
## scale(DENSITY, center = TRUE, scale = TRUE)     1.119457  1        1.058044
## scale(AGE_DAYS, center = TRUE, scale = TRUE)    1.219351  1        1.104242
## scale(LENGTH, center = TRUE, scale = TRUE)      1.142628  1        1.068938

Prior investigation

growth3 |> 
  group_by(REGION,DEV_TEMP) |> 
  summarise(mean(na.omit(MASS)), 
            sd(na.omit(MASS))) 
## `summarise()` has grouped output by 'REGION'. You can override using the
## `.groups` argument.

Priors

mass.priors <- prior(normal(1.13, 0.20), class="Intercept") +  
  prior(normal(0, 0.25), class="b") +
  prior(student_t(3, 0, 0.53), class = "sigma")

Models

Model3.1

Model

f.model3.1 <- bf(MASS ~ 1 + 
                         DEV_TEMP*REGION +  
                         scale(DENSITY, center=TRUE, scale=TRUE) + 
                         #scale(AGE_DAYS, center=TRUE, scale=TRUE) +
                         scale(LENGTH, center=TRUE, scale=TRUE) + 
                         (1| FEMALE) + (1| TANK) + (1|LEVEL) + (1| POPULATION), 
                         family=gaussian()) 

model3.1 <- brm(f.model3.1,
              data = growth3, 
              prior = mass.priors,
              warmup = 500, 
              iter = 5000,
              seed=123, 
              cores=2, 
              save_pars = save_pars(all=TRUE), 
              sample_prior = "yes",
              chains = 2, 
              thin = 5, 
              control = list(adapt_delta=0.95)) 
## Compiling Stan program...
## Start sampling
## Warning: There were 10 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: Examine the pairs() plot to diagnose sampling problems
saveRDS(model3.1, file="./02.mass_data_analysis_files/model3.1.RDS")

DHARMa

#--- distribution check ---#
pp_check(model3.1, type = 'dens_overlay')
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

#--- DHARMa checks ---#
preds <- posterior_predict(model3.1, ndraws=250, summary=FALSE)
model3.1_resids <- createDHARMa(simulatedResponse = t(preds), 
                              observedResponse = growth3$MASS, 
                              fittedPredictedResponse = apply(preds, 2, mean), 
                              integerResponse = 'student')
plot(model3.1_resids) ; testDispersion(model3.1_resids)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.90702, p-value = 0.04
## alternative hypothesis: two.sided

MCMC Diagnostics

stan_trace(model3.1$fit)
## 'pars' not specified. Showing first 10 parameters by default.

stan_ac(model3.1$fit) 
## 'pars' not specified. Showing first 10 parameters by default.

stan_rhat(model3.1$fit) 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_ess(model3.1$fit)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_dens(model3.1$fit, separate_chains = TRUE)
## 'pars' not specified. Showing first 10 parameters by default.

Model3.2 (polynomial model)

Model

f.model3.2 <- bf(MASS ~ 1 + 
                         DEV_TEMP*REGION + 
                         scale(DENSITY, center=TRUE, scale=TRUE) + 
                         #scale(AGE_DAYS, center=TRUE, scale=TRUE) +
                         poly(scale(LENGTH, center=TRUE, scale=TRUE), 2) + 
                         (1| FEMALE) + (1| TANK) + (1|LEVEL) + (1| POPULATION), 
                         family=gaussian()) 

model3.2 <- brm(f.model3.2,
              data = growth3, 
              prior = mass.priors,
              warmup = 500, 
              iter = 5000,
              seed=123, 
              cores=2, 
              save_pars = save_pars(all=TRUE), 
              sample_prior = "yes",
              chains = 2, 
              thin = 5, 
              control = list(adapt_delta=0.95)) 
## Compiling Stan program...
## Start sampling
## Warning: There were 11 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: There were 39 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
saveRDS(model3.2, file="./02.mass_data_analysis_files/model3.2.RDS")

DHARMa

#--- distribution check ---#
pp_check(model3.2, type = 'dens_overlay')
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

#--- DHARMa checks ---#
preds <- posterior_predict(model3.2, ndraws=250, summary=FALSE)
model3.2_resids <- createDHARMa(simulatedResponse = t(preds), 
                              observedResponse = growth3$MASS, 
                              fittedPredictedResponse = apply(preds, 2, mean), 
                              integerResponse = 'student')
plot(model3.2_resids) ; testDispersion(model3.2_resids)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.91398, p-value = 0.072
## alternative hypothesis: two.sided

MCMC Diagnostics

stan_trace(model3.2$fit)
## 'pars' not specified. Showing first 10 parameters by default.

stan_ac(model3.2$fit) 
## 'pars' not specified. Showing first 10 parameters by default.

stan_rhat(model3.2$fit) 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_ess(model3.2$fit)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_dens(model3.2$fit, separate_chains = TRUE)
## 'pars' not specified. Showing first 10 parameters by default.

LOO

LOO(model3.1, model3.2) 
## Warning: Found 1 observations with a pareto_k > 0.7 in model 'model3.1'. It is
## recommended to set 'moment_match = TRUE' in order to perform moment matching
## for problematic observations.
## Warning: Found 1 observations with a pareto_k > 0.7 in model 'model3.2'. It is
## recommended to set 'moment_match = TRUE' in order to perform moment matching
## for problematic observations.
## Output of model 'model3.1':
## 
## Computed from 1800 by 1961 log-likelihood matrix.
## 
##          Estimate    SE
## elpd_loo    481.3  57.4
## p_loo       106.6   6.9
## looic      -962.5 114.8
## ------
## MCSE of elpd_loo is NA.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.7, 1.1]).
## 
## Pareto k diagnostic values:
##                           Count Pct.    Min. ESS
## (-Inf, 0.69]   (good)     1960  99.9%   207     
##    (0.69, 1]   (bad)         1   0.1%   <NA>    
##     (1, Inf)   (very bad)    0   0.0%   <NA>    
## See help('pareto-k-diagnostic') for details.
## 
## Output of model 'model3.2':
## 
## Computed from 1800 by 1961 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo   -324.2 45.1
## p_loo       101.3  5.6
## looic       648.5 90.3
## ------
## MCSE of elpd_loo is NA.
## MCSE and ESS estimates assume MCMC draws (r_eff in [0.7, 1.1]).
## 
## Pareto k diagnostic values:
##                           Count Pct.    Min. ESS
## (-Inf, 0.69]   (good)     1960  99.9%   290     
##    (0.69, 1]   (bad)         1   0.1%   <NA>    
##     (1, Inf)   (very bad)    0   0.0%   <NA>    
## See help('pareto-k-diagnostic') for details.
## 
## Model comparisons:
##          elpd_diff se_diff
## model3.1    0.0       0.0 
## model3.2 -805.5      48.0

There are issues with this model lets fit it using a Gamma distribution.

Note: The Gamma distribution will have a log link function.

Re-fit model [gamma]

model_gamma.priors <- prior(normal(1.13, 0.20), class="Intercept") +  
  prior(normal(0, 0.25), class="b")

Model3.1_gamma1

Model

f.model3.1_gamma1 <- bf(MASS ~ 1 + 
                         DEV_TEMP*REGION +  
                         scale(DENSITY, center=TRUE, scale=TRUE) + 
                         #scale(AGE_DAYS, center=TRUE, scale=TRUE) +
                         scale(LENGTH, center=TRUE, scale=TRUE) +  
                         (1| FEMALE) + (1| TANK) + (1|LEVEL) + (1| POPULATION), 
                         family=Gamma(link = "log"))

model3.1_gamma1 <- brm(f.model3.1_gamma1,
              data = growth3, 
              prior = model_gamma.priors,
              warmup = 500, 
              iter = 5000,
              seed=123, 
              cores=2, 
              sample_prior = "yes",
              save_pars = save_pars(all=TRUE),  
              chains = 2, 
              thin = 5, 
              control = list(adapt_delta=0.95))  
## Compiling Stan program...
## Start sampling
## Warning: There were 4 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: There were 904 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
saveRDS(model3.1_gamma1, file="./02.mass_data_analysis_files/model3.1_gamma1.RDS")

DHARMa

#--- distribution check ---#
pp_check(model3.1_gamma1, type = 'dens_overlay')
## Using 10 posterior draws for ppc type 'dens_overlay' by default.

#--- DHARMa checks ---#
preds <- posterior_predict(model3.1_gamma1, ndraws=250, summary=FALSE)
model3.1_gamma1_resids <- createDHARMa(simulatedResponse = t(preds), 
                              observedResponse = growth3$MASS, 
                              fittedPredictedResponse = apply(preds, 2, mean), 
                              integerResponse = 'student')
plot(model3.1_gamma1_resids) ; testDispersion(model3.1_gamma1_resids); testDispersion(model3.1_gamma1_resids, plot = F, alternative = "greater")

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.1673, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.1673, p-value < 2.2e-16
## alternative hypothesis: greater

MCMC Diagnostics

stan_trace(model3.1_gamma1$fit)
## 'pars' not specified. Showing first 10 parameters by default.

stan_ac(model3.1_gamma1$fit) 
## 'pars' not specified. Showing first 10 parameters by default.

stan_rhat(model3.1_gamma1$fit) 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_ess(model3.1_gamma1$fit)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stan_dens(model3.1_gamma1$fit, separate_chains = TRUE)
## 'pars' not specified. Showing first 10 parameters by default.

Partial effects plots

conditional_effects

model3.1_gamma1 |> 
  conditional_effects(spaghetti = TRUE, ndraws=200) 

Model investigation

summary

summary(model3.1_gamma1)
## Warning: There were 4 divergent transitions after warmup. Increasing
## adapt_delta above 0.95 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
##  Family: gamma 
##   Links: mu = log; shape = identity 
## Formula: MASS ~ 1 + DEV_TEMP * REGION + scale(DENSITY, center = TRUE, scale = TRUE) + scale(LENGTH, center = TRUE, scale = TRUE) + (1 | FEMALE) + (1 | TANK) + (1 | LEVEL) + (1 | POPULATION) 
##    Data: growth3 (Number of observations: 1961) 
##   Draws: 2 chains, each with iter = 5000; warmup = 500; thin = 5;
##          total post-warmup draws = 1800
## 
## Group-Level Effects: 
## ~FEMALE (Number of levels: 15) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.02      0.02     0.00     0.06 1.00      925     1402
## 
## ~LEVEL (Number of levels: 3) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.45      0.85     0.50     3.66 1.00     1193     1542
## 
## ~POPULATION (Number of levels: 8) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.05      0.03     0.00     0.12 1.00     1170     1377
## 
## ~TANK (Number of levels: 111) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.08      0.01     0.07     0.10 1.00     1491     1565
## 
## Population-Level Effects: 
##                                     Estimate Est.Error l-95% CI u-95% CI Rhat
## Intercept                               0.98      0.21     0.57     1.38 1.00
## DEV_TEMP30                              0.05      0.03    -0.01     0.10 1.00
## DEV_TEMP31.5                            0.07      0.03     0.01     0.12 1.00
## REGIONleading                           0.03      0.05    -0.07     0.13 1.00
## scaleDENSITYcenterEQTRUEscaleEQTRUE    -0.02      0.01    -0.04    -0.00 1.00
## scaleLENGTHcenterEQTRUEscaleEQTRUE      0.40      0.00     0.39     0.41 1.00
## DEV_TEMP30:REGIONleading               -0.03      0.04    -0.11     0.05 1.00
## DEV_TEMP31.5:REGIONleading             -0.03      0.04    -0.10     0.05 1.00
##                                     Bulk_ESS Tail_ESS
## Intercept                               1656     1524
## DEV_TEMP30                              1305     1631
## DEV_TEMP31.5                            1188     1361
## REGIONleading                           1341     1528
## scaleDENSITYcenterEQTRUEscaleEQTRUE     1603     1667
## scaleLENGTHcenterEQTRUEscaleEQTRUE      1792     1828
## DEV_TEMP30:REGIONleading                1247     1504
## DEV_TEMP31.5:REGIONleading              1223     1451
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape    41.21      1.34    38.64    43.82 1.00     1640     1869
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
#car::Anova(model1.geo.gamma)

R2

model3.1_gamma1 |> bayes_R2(summary = FALSE) |> median_hdci()

tidyMCMC

tidyMCMC(model3.1_gamma1, estimate.method='median', conf.int=TRUE, conf.method='HPDinterval')

gather draws

#model1.re.wo |> get_variables()
model3.1_gamma1 |> gather_draws(`b_.*|sigma`, regex =TRUE) |> 
  median_hdci()
#temp <- int_draws_plotting |>
  #group_by(EXP_GROUP) |> 
  #summarise(Median = median_hdci(MASS))

bayesplot

model3.1_gamma1 |> mcmc_plot(type='intervals')

emmeans - pariwise

model3.1_gamma1 |> emmeans(pairwise ~ REGION*DEV_TEMP, type="response") |> pairs(by="DEV_TEMP") |> summary(infer=TRUE)
model3.1_gamma1 |> emmeans(pairwise ~ REGION*DEV_TEMP, type="response") |> pairs(by="REGION") |> confint()
model3.1_gamma1 |> emmeans(pairwise ~ REGION*DEV_TEMP, type="response") |> regrid() 
##  REGION  DEV_TEMP response lower.HPD upper.HPD
##  core    28.5         2.68      1.62      3.79
##  leading 28.5         2.77      1.74      3.93
##  core    30           2.81      1.69      3.95
##  leading 30           2.83      1.76      4.00
##  core    31.5         2.86      1.82      4.12
##  leading 31.5         2.87      1.76      4.05
## 
## Point estimate displayed: median 
## HPD interval probability: 0.95

probabilities

#model1.geo.gamma |> emmeans(~ REGION | DEV_TEMP, at = list(DEV_TEMP = 31.5, length.out = 10)) |> pairs(type = 'response')
mtsqst <- model3.1_gamma1 |> emmeans(pairwise ~ REGION*DEV_TEMP)
mtsqrt2 <- mtsqst$contrasts |> gather_emmeans_draws()
mtsqrt2 %>% group_by(contrast) %>% dplyr::summarise(Prob = sum(.value<0)/n())

summary figure

var <- get_variables(model3.1_gamma1)
var1 <- get_variables(model3.1_gamma1)[c(1:4,7,8)] 

int_draws_spread <- model3.1_gamma1 |> spread_draws(!!!syms(var1)) 

int_draws_spread2 <- int_draws_spread |> 
  gather_draws(b_Intercept, b_REGIONleading) |> 
  left_join(int_draws_spread, by = c(".chain",".iteration",".draw"))
  
  
int_draws <- int_draws_spread2 |> 
  mutate(x28.5 = case_when(`.variable` == 'b_Intercept' ~ 
                                 `.value`, 
                               `.variable` != 'b_Intercept' ~ 
                                 `.value` + b_Intercept), 
         
         x30 = case_when(`.variable` == 'b_Intercept' ~ 
                                 `.value` + b_DEV_TEMP30, 
                             `.variable` == 'b_REGIONleading' ~ 
                                 `.value` + b_Intercept + b_DEV_TEMP30 + `b_DEV_TEMP30:REGIONleading`), 
         
         x31.5 = case_when(`.variable` == 'b_Intercept' ~ 
                                 `.value` + b_DEV_TEMP31.5, 
                             `.variable` == 'b_REGIONleading' ~ 
                                 `.value` + b_Intercept + b_DEV_TEMP31.5 + `b_DEV_TEMP31.5:REGIONleading`))
  
int_draws_plotting <- int_draws |> 
  pivot_longer(cols = starts_with("x"), 
               names_to = "DEV_TEMP", 
               values_to = "MASS") |> 
  transmute(LATITUDE = case_when(`.variable` == "b_Intercept" ~ "Low latitude", 
                                 `.variable` == "b_REGIONleading" ~ "High latitude"), 
            DEV_TEMP = case_when(DEV_TEMP == 'x28.5' ~ 'DEV_TEMP_28.5', 
                                 DEV_TEMP == 'x30' ~ 'DEV_TEMP_30', 
                                 DEV_TEMP == 'x31.5' ~ 'DEV_TEMP_31.5'),
            MASS = MASS, 
            LATITUDE_B = LATITUDE, 
            DEV_TEMP_B = DEV_TEMP,
            chain = `.chain`, 
            iteration = `.iteration`, 
            draw_n = `.draw`) |> 
  unite("EXP_GROUP",LATITUDE_B, DEV_TEMP_B,sep="_") |> 
  mutate(EXP_GROUP = as.factor(EXP_GROUP)) |> 
  mutate(EXP_GROUP = factor(EXP_GROUP, levels=c("High latitude_DEV_TEMP_28.5","High latitude_DEV_TEMP_30", "High latitude_DEV_TEMP_31.5",
                                                "Low latitude_DEV_TEMP_28.5","Low latitude_DEV_TEMP_30", "Low latitude_DEV_TEMP_31.5")))


mass.plot <- int_draws_plotting |> 
  ggplot(aes(x=EXP_GROUP, y=MASS)) + 
  #geom_hline(yintercept = 0, linetype="dashed", linewidth=1, color="grey58", alpha=0.8) + 
  stat_halfeye(aes(fill = EXP_GROUP, fill_ramp = after_stat(level)), 
               point_interval = median_hdci, 
               .width = c(.66, .89, .95)) + 
  scale_fill_ramp_discrete(na.translate=FALSE, 
                           labels =c("0.95","0.89","0.66"), 
                           name = "Credible interval") +
  scale_fill_manual(values = c("lightskyblue" ,"dodgerblue2", "dodgerblue4","coral", "red2","firebrick4"))+ 
  scale_y_continuous(limits=c(0,2), breaks = seq(0,2, .5))+
  ylab("MASS (g)") + xlab("EXPERIMENTAL GROUP") +
  scale_x_discrete(labels = c("Low latitude_DEV_TEMP_28.5" = paste0("Low latitude 28.5","\u00B0","C"), 
                              "Low latitude_DEV_TEMP_30" = paste0("Low latitude 30","\u00B0","C"),
                              "Low latitude_DEV_TEMP_31.5" = paste0("Low latitude 31.5","\u00B0","C"), 
                              "High latitude_DEV_TEMP_28.5" = paste0("High latitude 28.5","\u00B0","C"),
                              "High latitude_DEV_TEMP_30" = paste0("High latitude 30","\u00B0","C"),
                              "High latitude_DEV_TEMP_31.5" = paste0("High latitude 31.5","\u00B0","C"))) +
  #annotate("text", x=6.8,y=1.4, label = paste0(round(mean(growth3$MASS), 2)," (mm)"), color="grey58") +
  coord_flip() + 
  theme_classic() + 
  guides(fill = "none") + 
  theme(legend.position = c(.15,.86), 
        axis.title.y = element_text(margin = margin(r =0.3, unit = "in"), size = 12), 
        axis.title.x = element_text(margin = margin(t = 0.3, unit="in"), size =12), 
        legend.key = element_rect(color="black", size=1.25), 
        legend.background = element_rect(fill = alpha("blue", 0))); mass.plot

#ggsave(file="./figures/mass.plot.svg",  plot=mass.plot, width=8, height=5, units = "in", dpi=300) 
#ggsave(file="./figures/mass.plot.pdf",  plot=mass.plot, width=8, height=5, units = "in", dpi=300)

just growth

model_gamma.priors <- prior(normal(1.13, 0.20), class="Intercept") +  
  prior(normal(0, 0.25), class="b")

Model3.1_gamma1

Model

f.model3.1.nolength <- bf(MASS ~ 1 + 
                         DEV_TEMP*REGION +  
                         scale(DENSITY, center=TRUE, scale=TRUE) + 
                         #scale(AGE_DAYS, center=TRUE, scale=TRUE) +
                         #scale(LENGTH, center=TRUE, scale=TRUE) +  
                         (1| FEMALE) + (1| TANK) + (1|LEVEL) + (1| POPULATION), 
                         family=Gamma(link = "log"))

model3.1.nolength <- brm(f.model3.1.nolength,
              data = growth3, 
              prior = model_gamma.priors,
              warmup = 500, 
              iter = 5000,
              seed=123, 
              cores=2, 
              sample_prior = "yes",
              save_pars = save_pars(all=TRUE),  
              chains = 2, 
              thin = 5, 
              control = list(adapt_delta=0.95))  
## Compiling Stan program...
## Start sampling
## Warning: There were 2 divergent transitions after warmup. See
## https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## to find out why this is a problem and how to eliminate them.
## Warning: There were 71 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
saveRDS(model3.1.nolength, file="./02.mass_data_analysis_files/model3.1_growth_nolength.RDS")

summary figure

var <- get_variables(model3.1.nolength)
var1 <- get_variables(model3.1.nolength)[c(1:4,6,7)] 

int_draws_spread <- model3.1.nolength |> spread_draws(!!!syms(var1)) 

int_draws_spread2 <- int_draws_spread |> 
  gather_draws(b_Intercept, b_REGIONleading) |> 
  left_join(int_draws_spread, by = c(".chain",".iteration",".draw"))
  
  
int_draws <- int_draws_spread2 |> 
  mutate(x28.5 = case_when(`.variable` == 'b_Intercept' ~ 
                                 `.value`, 
                               `.variable` != 'b_Intercept' ~ 
                                 `.value` + b_Intercept), 
         
         x30 = case_when(`.variable` == 'b_Intercept' ~ 
                                 `.value` + b_DEV_TEMP30, 
                             `.variable` == 'b_REGIONleading' ~ 
                                 `.value` + b_Intercept + b_DEV_TEMP30 + `b_DEV_TEMP30:REGIONleading`), 
         
         x31.5 = case_when(`.variable` == 'b_Intercept' ~ 
                                 `.value` + b_DEV_TEMP31.5, 
                             `.variable` == 'b_REGIONleading' ~ 
                                 `.value` + b_Intercept + b_DEV_TEMP31.5 + `b_DEV_TEMP31.5:REGIONleading`))
  
int_draws_plotting <- int_draws |> 
  pivot_longer(cols = starts_with("x"), 
               names_to = "DEV_TEMP", 
               values_to = "MASS") |> 
  transmute(LATITUDE = case_when(`.variable` == "b_Intercept" ~ "Low latitude", 
                                 `.variable` == "b_REGIONleading" ~ "High latitude"), 
            DEV_TEMP = case_when(DEV_TEMP == 'x28.5' ~ 'DEV_TEMP_28.5', 
                                 DEV_TEMP == 'x30' ~ 'DEV_TEMP_30', 
                                 DEV_TEMP == 'x31.5' ~ 'DEV_TEMP_31.5'),
            MASS = MASS, 
            LATITUDE_B = LATITUDE, 
            DEV_TEMP_B = DEV_TEMP,
            chain = `.chain`, 
            iteration = `.iteration`, 
            draw_n = `.draw`) |> 
  unite("EXP_GROUP",LATITUDE_B, DEV_TEMP_B,sep="_") |> 
  mutate(EXP_GROUP = as.factor(EXP_GROUP)) |> 
  mutate(EXP_GROUP = factor(EXP_GROUP, levels=c("High latitude_DEV_TEMP_28.5","High latitude_DEV_TEMP_30", "High latitude_DEV_TEMP_31.5",
                                                "Low latitude_DEV_TEMP_28.5","Low latitude_DEV_TEMP_30", "Low latitude_DEV_TEMP_31.5")))


mass.plot <- int_draws_plotting |> 
  ggplot(aes(x=EXP_GROUP, y=MASS)) + 
  #geom_hline(yintercept = 0, linetype="dashed", linewidth=1, color="grey58", alpha=0.8) + 
  stat_halfeye(aes(fill = EXP_GROUP, fill_ramp = after_stat(level)), 
               point_interval = median_hdci, 
               .width = c(.66, .89, .95)) + 
  scale_fill_ramp_discrete(na.translate=FALSE, 
                           labels =c("0.95","0.89","0.66"), 
                           name = "Credible interval") +
  scale_fill_manual(values = c("lightskyblue" ,"dodgerblue2", "dodgerblue4","coral", "red2","firebrick4"))+ 
  scale_y_continuous(limits=c(0,2), breaks = seq(0,2, .5))+
  ylab("MASS (g)") + xlab("EXPERIMENTAL GROUP") +
  scale_x_discrete(labels = c("Low latitude_DEV_TEMP_28.5" = paste0("Low latitude 28.5","\u00B0","C"), 
                              "Low latitude_DEV_TEMP_30" = paste0("Low latitude 30","\u00B0","C"),
                              "Low latitude_DEV_TEMP_31.5" = paste0("Low latitude 31.5","\u00B0","C"), 
                              "High latitude_DEV_TEMP_28.5" = paste0("High latitude 28.5","\u00B0","C"),
                              "High latitude_DEV_TEMP_30" = paste0("High latitude 30","\u00B0","C"),
                              "High latitude_DEV_TEMP_31.5" = paste0("High latitude 31.5","\u00B0","C"))) +
  #annotate("text", x=6.8,y=1.4, label = paste0(round(mean(growth3$MASS), 2)," (mm)"), color="grey58") +
  coord_flip() + 
  theme_classic() + 
  guides(fill = "none") + 
  theme(legend.position = c(.15,.86), 
        axis.title.y = element_text(margin = margin(r =0.3, unit = "in"), size = 12), 
        axis.title.x = element_text(margin = margin(t = 0.3, unit="in"), size =12), 
        legend.key = element_rect(color="black", size=1.25), 
        legend.background = element_rect(fill = alpha("blue", 0))); mass.plot